********************************************************************************
*																			   *
*						Sir! I'd Rather Go to School Sir!					   *
*																			   *
*								 Mahdi Majbouri								   *
*																			   *
*									 Table 1								   *
*																			   *
*									6-6-2023								   *
*																			   *
********************************************************************************

use "data.dta", clear


********** EDUCATION
replace edattain = . if edattain == 0 | edattain == 9
replace edattain = 0 if lit == 1

replace edattaind = . if edattaind == 999
replace edattaind = . if edattaind == 0
replace edattaind = 0 if lit == 1
replace edattaind = . if lit >= 8

gen primary = (edattaind == 120 | edattaind == 211) if edattaind !=. 
gen midschool = (edattaind == 221) if edattaind != .
gen prim_mid = (primary == 1 | midschool == 1) if edattaind != .
gen highschool = (edattaind == 311 | edattaind == 321) if edattaind != .


gen college_plus = (edattaind == 312 | edattaind == 400) if edattaind != .
*replace college_plus = . if edattaind ==  0


** All households
egen hh = tag(serial)
tab hh if hh == 1				// Total number of HHs (row 1 in Table 1)

** with head
gen head = 1 if relate == 1
by serial, sort: egen heads = total(head)
tab heads if hh == 1 			// HHs with and without head (row 2 in Table 1)

drop if heads == 0
tab heads if hh == 1

*** monogamous vs. polygamous hhs
gen spouse = (relate == 2)
by serial, sort: egen spouses = total(spouse)
tab spouses if hh == 1			// rows 3-5 in Table 1


** non-nuclear families

gen child_inlaw = (related == 4300)
by serial, sort: egen child_inlaws = total(child_inlaw)

gen grandparent = (related == 4210 | relate == 4220)
by serial, sort: egen grandparents = total(grandparent)

gen sibling = (related == 4410 | related == 4430)
by serial, sort: egen siblings = total(sibling)

gen other = (related == 4900 | related == 5900)
by serial, sort: egen others = total(other)

gen nonekid = (relate != 1 & relate != 2 & relate != 3)
by serial, sort: egen nonekids = total(nonekid)


** Table 1, rows 6 to 9:
tab child_inlaws if hh			// HHs with son or daughter-in-laws
tab grandparents if hh			// HHs with head's or spouse's parents
tab siblings if hh				// HHs with head's or spouse's siblings
tab others if hh				// HHs with other relatives and non-relatives

tab nonekids if hh


** Only keep households with one spouse to best identify sole sons
keep if spouses == 1 


** nuclear family: a husband, wife, and their children (no step-children)
gen child = 1 if relate == 3
replace child = 0 if relate != 3

by serial, sort: egen children = total(child)
tab children if hh == 1


replace momloc = . if momloc == 0
egen unique_mother = tag(serial momloc)
by serial, sort: egen no_of_mothers = total(unique_mother)
keep if no_of_mothers == 1

tab no_of_mothers if hh == 1  // Households with a mother and her husband 
							  // (row 10 in Table 1)


tab nonekids
keep if nonekids == 0	


** identifies mothers
sort serial
by serial, sort: egen mom_num = mean(momloc)
tab mom_num

replace momloc = mom_num if pernum == mom_num

							  
** households with step-children
tab pernum if relate==3 & momloc==.

gen step_child = (relate==3 & momloc==.)
by serial, sort: egen step_children=total(step_child)

** drop households with step-children
keep if step_children == 0


** kids ever born = kids in the household
*gen child = (relate == 3)
by serial, sort: egen kidsize = total(child)
replace chborn = . if chborn >=98
gen kidborn = chborn if pernum == momloc
by serial, sort: egen kidsborn = mean(kidborn)

*by serial momloc_child, sort: gen d = _N - 1
*by serial momloc_child, sort: egen d2 = total(child)

keep if kidsborn == kidsize

tab hh 							// Households with a mother and her husband and
								// their all children present, row 11 in Table 1


								
** Identifying households with sole sons
gen female = (sex == 2) if sex != .
gen son = (relate==3 & female==0) if relate!=. & female!=. 
by serial, sort: egen sons = sum(son)
replace son = . if relate != 3


** Generate father's and mother's education
gen father = 1 if (relate == 1 | relate == 2) & pernum != momloc & sex == 1
replace father = 0 if father != 1

gen f_ed = edattain if father == 1
by serial, sort: egen father_ed = mean(f_ed)

gen mom = (momloc == pernum)
gen m_ed = edattain if mom == 1
by serial, sort: egen mom_ed = mean(m_ed)


** Generate mother's age
replace age = . if age >= 998
gen motherage = age if mom == 1
by serial, sort: egen mage = min(motherage)


** Generate father's age
gen fatherage = age if father == 1
by serial, sort: egen fage = max(fatherage)


** Generate father's age when the child is 18 (fage18) for all children
gen fage18 = fage - age + 18


** Generate the year children turned 18 (y18) for all children
gen y18 = 2011 - age + 18


** Keep only urban households
keep if urban == 2


** Making sure that only male heads are included
gen hsex = sex if relate == 1
by serial, sort: egen head_sex = max(hsex)
tab head_sex							

keep if head_sex == 1


** Only keep children who were 18 and above 18 during the enactment of the 
** exemption law
keep if y18>2000 & y18<=2010


** Generate the treatment variable (t)
gen threshold = 58
gen t = (fage18<=threshold)
replace t = . if fage18==.


** Generate the polynomial of the running variable and its interactions
gen fa = fage18 - threshold
gen fasq = fa * fa /10
gen tfa = t * fa
gen tfasq = t * fa * fa / 10
gen facb = fa * fa * fa /10000
gen tfacb = t * facb

keep if age >= 20

keep female age son sons prim_mid highschool college_plus mom_ed father_ed fage fage18 mage y18 threshold relate urban occ school empstat t fa fasq tfa tfasq facb tfacb

save "census11-all.dta", replace

/*
***** CREATING THE SOLE SONS SAMPLE
keep if sons==1
keep if age >= 20

save "census11-solesons.dta", replace
*/

**** CREATING THE NATIONALLY REPRESENTATIVE SAMPLE
use "data.dta", clear

********** EDUCATION
replace edattaind = . if edattaind == 999
replace edattaind = . if edattaind == 0
replace edattaind = 0 if lit == 1
replace edattaind = . if lit >= 8

gen primary = (edattaind == 110 | edattaind == 120 | edattaind == 211) if edattaind !=. 
gen midschool = (edattaind == 221) if edattaind != .
gen prim_mid = (primary == 1 | midschool == 1) if edattaind != .
gen highschool = (edattaind == 311 | edattaind == 321) if edattaind != .


gen college_plus = (edattaind == 312 | edattaind == 400) if edattaind != .
replace college_plus = . if edattaind ==  0



replace age = . if age >= 998

keep if urban == 2	
keep if age>=20 & age <=28 & edattaind != . & college_plus != .
keep age prim_mid highschool college_plus sex
save "census20-28.dta", replace


********************************************************************************
*********************************  THE END *************************************
********************************************************************************

